*---------------------------------------FORMAT_AA_SALE_ADDRESSES.DO-----------------------------------------
*This script prepares an address table of AA sales for use with GIS software to map addresses to neighborhoods.

*Sebastien Bradley 6/10/10

clear all
capture cd "C:/Users/Sebastien/Documents/Research/PropertyTaxes/Ann_Arbor/AA_proptaxdata19972008"
set memory 64m
capture log close
set more off

capture use AA_data.dta, clear

rename propaddresscombined address

*Add city and zip code information to property addresses from owner information, where possible:
#delimit ;
gen city = "ANN ARBOR";
gen state = "MI";

/*
replace ownercity = upper(ownercity);
gen zip = substr(ownerzipcode,1,5) if address==ownerstreetaddress;

*Many owner addresses have missing street types, so match on street number and name;
replace zip = substr(ownerzipcode,1,5) if address~=ownerstreetaddress & ownercity=="ANN ARBOR" & 
	word(address,1)==word(ownerstreetaddress,1) & word(address,2)==word(ownerstreetaddress,2);
*In fact, visual inspection shows that matching only on street number is accurate is all but one case;
replace zip = substr(ownerzipcode,1,5) if address~=ownerstreetaddress & ownercity=="ANN ARBOR" & 
	word(address,1)==word(ownerstreetaddress,1);
replace zip = "" if address=="5 ROCKLAND CT";
*In an additional large number of cases, owner addresses have neighboring street numbers (often times
*with a single owner for many properties).  Assuming zip codes do not separate properties located on the
*same street;
replace zip = substr(ownerzipcode,1,5) if address~=ownerstreetaddress & ownercity=="ANN ARBOR" & 
	word(address,2)==word(ownerstreetaddress,2) & word(address,3)==word(ownerstreetaddress,3);
	
*Repeat for properties with missing owner city and state information but where street addresses match;
replace zip = substr(ownerzipcode,1,5) if ownercity=="" & ownerstate=="" & zip=="" & ownerzipcode~="" & 
	word(address,1)==word(ownerstreetaddress,1) & word(address,2)==word(ownerstreetaddress,2);
*Matching only on street number is valid;
replace zip = substr(ownerzipcode,1,5) if ownercity=="" & ownerstate=="" & zip=="" & ownerzipcode~="" & 
	word(address,1)==word(ownerstreetaddress,1);
*Neighboring owners;
replace zip = substr(ownerzipcode,1,5) if ownercity=="" & ownerstate=="" & zip=="" & ownerzipcode~="" & 
	word(address,2)==word(ownerstreetaddress,2) & word(address,3)==word(ownerstreetaddress,3);	

destring zip, replace;
*/
	
*Miscellaneous;
replace address = subinword(address,"CRS","CRES",1);		/*GIS address locator uses different abbreviations*/
replace address = subinword(address,"CRESCENT","CRES",1);
replace address = subinword(address,"LA","LN",1) if word(address,-1)=="LA"; 	/*Only replace street type, not street name*/
replace address = subinword(address,"LANE","LN",1);
replace address = subinword(address,"BLV","BLVD",1);
replace address = subinword(address,"AV","AVE",1);
replace address = subinword(address,"AVENUE","AVE",1);
replace address = subinstr(address,"EMBER WAY","EMERALD AVE",1);
replace address = subinstr(address,"BEACON HILL ST","BEACON HILL",1);
	
#delimit cr


*Reduce dataset to unique addresses
sort pid address
drop if pid==pid[_n-1]


keep pid address city state /*zip*/ class				/*Geocode without zip codes*/
order pid address city state /*zip*/ class
outsheet using AA_sale_addresses.txt, names replace

/**/
